# 可视化单词频率
import pandas as pd
import matplotlib.pyplot as plt

inputfile = r"..\data\microwave_6extract_word.txt"
outputfile = r"..\data\microwave_6words.tsv"

m_data = pd.read_csv(inputfile, sep=',', encoding='utf-8')
print(type(m_data))  # dataframe

m_data.columns = ['word', 'freq', 'form', '0']
# print(m_data)
# m_data.to_csv(outputfile, index=0, header=1, encoding='utf-8')

# m_data = m_data.head(50)

m_word = m_data['word'].values.tolist()[40:90]
m_freq = m_data['freq'].values.tolist()[40:90]

print(type(m_word))


plt.bar(x=m_word, height=m_freq, label='freq', color='steelblue', alpha=0.8)
plt.xticks(rotation=75)
# 设置标题
plt.title("word's frequency")
# 为两条坐标轴设置名称
plt.xlabel("word")
plt.ylabel("frequency")
# 显示图例
plt.legend()
plt.show()
